Series de tiempo

De entre las series de tiempo para cada enfermedad en el periodo descrito, no todas contienen información de casos reportados durante el periodo de tiempo establecido para la investigación, por lo que se extraen los que reportan al menos la mitad del periodo (260 semanas)

In [142]:
# https://stackoverflow.com/a/16916611
print('Iniciales {}'.format(len(cie))) 
cie = cie.filter(lambda x: x['sem'].count() >= 260 )
cie.reset_index(drop=True, inplace=True)
cie = cie.groupby('cie')
print('Restantes {}'.format(len(cie)))
Iniciales 138
Restantes 40
In [143]:
print('Iniciales {}'.format(len(cieG))) 
cieG = cieG.filter(lambda x: x['sem'].count() >= 260 )
cieG.reset_index(drop=True, inplace=True)
cieG = cieG.groupby(cieG.cie.str[0])
print('Restantes {}'.format(len(cieG)))
Iniciales 22
Restantes 12

Así, de 138 series de tiempo de enfermedades, se obtienen 40 en las que al menos se cuenta con datos semanales de 5 años. Para dichas enfermedades se obtienen los pesos de la regresión lineal y se obtiene la serie de tiempo sin la tendencia y las autocorrelaciones (eliminando la a92.3 porque viene vacía)

In [490]:
from scipy import signal
from statsmodels.graphics.tsaplots import plot_pacf, plot_acf
from statsmodels.tsa.stattools import acf

ciesF = [] # CIEs Características
ciesTSt = [] # CIEs Series de tiempo

for name, group in cie:
    if name == 'a92.3':
        continue
    
    # https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.detrend.html
    detrended = signal.detrend(group.casos)
    
    a, b, r, p, e = stats.linregress(group['sem'], group.casos)
    print("y = f(x) = {} x + {}".format(a, b))
    print("error", e)
    print("p = ", p)
    print("pendiente {:s}significativa".format("no " if p >= 0.05 else ""))
    print("R^2", r**2)
    plt.figure(figsize=(12, 2))
    plt.plot(group['sem'], group.casos)
    plt.plot(group['sem'], detrended, c='black')
    plt.plot(group['sem'], (a * group['sem'] + b), label = 'y = {:.1f}x + {:.0f}'.format(a, b), color = 'red', linewidth = 3)
    plt.title(name)
    plt.xlabel("Semana")
    plt.ylabel("Casos normalizados")
    plt.show()
    
    # https://stackoverflow.com/questions/48497756/time-series-distance-metric
    plt.figure(figsize=(12, 2))
    # https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Series.cumsum.html
    plt.plot(group['sem'], group.casos.cumsum(), c='green')
    plt.title(name)
    plt.xlabel("Semana")
    plt.ylabel("Acumulado de Casos normalizados")
    plt.show()
    
    # https://machinelearningmastery.com/gentle-introduction-autocorrelation-partial-autocorrelation/
    plot_acf(detrended, lags=52)
    # https://www.statsmodels.org/dev/generated/statsmodels.tsa.stattools.acf.html
    plt.title(name)
    plt.xlabel("Retraso en semanas")
    plt.ylabel('Correlación')
    plt.show()
    
    temp = [a, b]
    # https://stackoverflow.com/a/3748071
    temp.extend(acf(detrended, nlags=52))
    temp.append(name)
    ciesF.append(temp)
    
    temp2 = list(group.casos)
    #temp2.append(name)
    ciesTSt.append(temp2)
y = f(x) = 4.2687498879775535e-11 x + -5.61276490864389e-09
error 2.0113605583063735e-11
p =  0.03438325581830106
pendiente significativa
R^2 0.010390302941758996
y = f(x) = -1.7653075450894884e-06 x + 0.0019754909758688734
error 1.215881678974069e-07
p =  1.0774404716157897e-37
pendiente significativa
R^2 0.3765563116502911
y = f(x) = -3.0752338979568535e-09 x + 1.2904965711605356e-05
error 8.36749265385657e-10
p =  0.0002687345236732541
pendiente significativa
R^2 0.03144815483703095
y = f(x) = -1.0785666679174943e-08 x + 6.155839207125693e-06
error 3.564598970127455e-10
p =  3.9269145731807006e-112
pendiente significativa
R^2 0.6622135065357393
y = f(x) = -9.213574424729245e-09 x + 1.2957954379028175e-05
error 6.75300549213463e-10
p =  4.863591366039641e-36
pendiente significativa
R^2 0.2798612445705242
y = f(x) = -3.831012274635742e-09 x + 4.559110665311131e-06
error 2.056660576324692e-10
p =  2.1083769747172313e-58
pendiente significativa
R^2 0.4247090552255535
y = f(x) = -1.4627725963638185e-11 x + 4.5153183905681134e-08
error 8.6165080095987e-12
p =  0.09023493838847157
pendiente no significativa
R^2 0.006068835261027791
y = f(x) = -1.7642681104091792e-10 x + 6.894953499860091e-07
error 6.403728082254567e-11
p =  0.0061121040534305735
pendiente significativa
R^2 0.01699628644380042
y = f(x) = 3.8938781393720075e-11 x + 4.4324399670136904e-08
error 2.301537019363991e-11
p =  0.09131901816285937
pendiente no significativa
R^2 0.00590350974760678
y = f(x) = 2.3434709729609935e-13 x + -3.6003147872535345e-11
error 1.478629682740248e-13
p =  0.1136746875979033
pendiente no significativa
R^2 0.005407586602284264
y = f(x) = -3.4234098194666664e-11 x + 1.8622107713377432e-08
error 7.415007265413988e-12
p =  4.945787391949576e-06
pendiente significativa
R^2 0.040422588588555235
y = f(x) = 2.725400812299939e-10 x + 1.9471946148640167e-08
error 7.110945660238779e-11
p =  0.00014458146666075644
pendiente significativa
R^2 0.031274848966871695
y = f(x) = -4.255464112013022e-09 x + 2.7470903702510135e-06
error 3.3548559373967543e-10
p =  6.366969283108536e-32
pendiente significativa
R^2 0.25462455709847065
y = f(x) = -1.0744562226275755e-10 x + 5.3269706349552e-08
error 6.010845348981309e-11
p =  0.07452781641973526
pendiente no significativa
R^2 0.007081769917450651
y = f(x) = -4.51442956629053e-11 x + 5.003109663849265e-08
error 8.825884482273087e-12
p =  4.625709147491021e-07
pendiente significativa
R^2 0.05414963353204715
y = f(x) = -2.8757621426513095e-09 x + 8.65805695809489e-06
error 2.5195362618720955e-09
p =  0.25425697912167106
pendiente no significativa
R^2 0.002603944949826924
y = f(x) = -1.4644681914017878e-07 x + 0.00011211051217541277
error 1.1228826202874234e-08
p =  2.0750705342062423e-33
pendiente significativa
R^2 0.2653193213169377
y = f(x) = -2.722125710622442e-10 x + 1.262294141156984e-07
error 1.588658069624221e-11
p =  7.30165010543899e-51
pendiente significativa
R^2 0.3985877970692943
y = f(x) = -7.433434732564669e-09 x + 6.901511580636186e-06
error 4.809110869935852e-10
p =  3.1523565347787724e-42
pendiente significativa
R^2 0.3841635208468614
y = f(x) = -1.2411343091809244e-10 x + 2.1160165305354125e-07
error 2.468137006599137e-11
p =  7.217507598355145e-07
pendiente significativa
R^2 0.05458192227303572
y = f(x) = 4.306189172696877e-11 x + 4.5311776302657916e-07
error 4.983732447188263e-11
p =  0.3880813876059074
pendiente no significativa
R^2 0.0018723188046217538
y = f(x) = -9.851452950958728e-10 x + 1.4372871687382767e-06
error 2.0422528814979408e-10
p =  2.004159852045715e-06
pendiente significativa
R^2 0.05484537751521497
y = f(x) = -4.5977874526241305e-09 x + 2.970356116391645e-06
error 1.2200834000344058e-10
p =  5.731116914266081e-137
pendiente significativa
R^2 0.7713322815069262
y = f(x) = -3.141197049928086e-08 x + 0.00011159382796415527
error 1.555599789385595e-08
p =  0.04432369209037648
pendiente significativa
R^2 0.013065683803978847
y = f(x) = -1.0673850709576887e-11 x + 4.355250762325799e-09
error 3.1880871555405942e-12
p =  0.0008764994132686745
pendiente significativa
R^2 0.02236467960180641
y = f(x) = -1.1834633525276088e-09 x + 7.100912129285122e-07
error 9.393463801467669e-11
p =  1.2559331815198554e-31
pendiente significativa
R^2 0.24928894104283042
y = f(x) = -2.6544241562890327e-10 x + 3.2049521522763566e-07
error 3.7319118941308474e-11
p =  4.610321182741757e-12
pendiente significativa
R^2 0.10270484494415096
y = f(x) = -4.556172046800565e-10 x + 3.7500114147077754e-07
error 3.423510008372152e-11
p =  1.8548314561638924e-34
pendiente significativa
R^2 0.27626166419878756
y = f(x) = -6.932314608726674e-06 x + 0.008805967823835786
error 6.162044690299544e-07
p =  7.503334430143699e-26
pendiente significativa
R^2 0.23113873276962404
y = f(x) = -4.3876585629436324e-08 x + 5.08928603405923e-05
error 4.181979380800392e-09
p =  2.3727736779313174e-23
pendiente significativa
R^2 0.1846709286259034
y = f(x) = 3.0607452607643954e-14 x + 8.632235917582137e-10
error 1.081125720208934e-12
p =  0.977427221531179
pendiente no significativa
R^2 1.8298987789071008e-06
y = f(x) = -7.142618310354189e-10 x + 1.178302593061399e-06
error 1.457944295445013e-10
p =  1.332329783777257e-06
pendiente significativa
R^2 0.04918267323938655
y = f(x) = -1.3158987800860395e-08 x + 1.643445025799908e-05
error 1.6084785128648306e-09
p =  1.0885896084621633e-14
pendiente significativa
R^2 0.19747207526346663
y = f(x) = -5.001283641779608e-08 x + 8.79623291848092e-05
error 5.714736801137942e-09
p =  3.9731738436330694e-17
pendiente significativa
R^2 0.14220420601744926
y = f(x) = -8.130421944150189e-10 x + 1.4827816461880552e-06
error 5.134961166576471e-10
p =  0.11399979312553642
pendiente no significativa
R^2 0.005185003922499099
y = f(x) = -2.3438815029578847e-11 x + 1.5962437413039917e-07
error 5.628337741371575e-11
p =  0.6772793134611802
pendiente no significativa
R^2 0.00037523732655554817
y = f(x) = -5.193298199318447e-08 x + 4.533358442590547e-05
error 1.6692637054937939e-09
p =  1.6078008523125964e-114
pendiente significativa
R^2 0.6811910446622308
y = f(x) = -9.132331371853938e-10 x + 1.3202325787207294e-06
error 1.431149104840528e-10
p =  4.359158383428425e-10
pendiente significativa
R^2 0.08264080526994147
y = f(x) = 3.638834463785146e-10 x + 1.1174278631513553e-06
error 1.1201987356227443e-10
p =  0.001257639893824037
pendiente significativa
R^2 0.025453964627936122
In [486]:
for name, group in cieG:
    
    # https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.detrend.html
    detrended = signal.detrend(group.casos)
    
    a, b, r, p, e = stats.linregress(group['sem'], group.casos)
    print("y = f(x) = {} x + {}".format(a, b))
    print("error", e)
    print("p = ", p)
    print("pendiente {:s}significativa".format("no " if p >= 0.05 else ""))
    print("R^2", r**2)
    plt.figure(figsize=(12, 2))
    plt.plot(group['sem'], group.casos)
    plt.plot(group['sem'], detrended, c='black')
    plt.plot(group['sem'], (a * group['sem'] + b), label = 'y = {:.1f}x + {:.0f}'.format(a, b), color = 'red', linewidth = 3)
    plt.title(name)
    plt.xlabel("Semana")
    plt.ylabel("Casos normalizados")
    plt.show()
    
    # https://stackoverflow.com/questions/48497756/time-series-distance-metric
    plt.figure(figsize=(12, 2))
    # https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Series.cumsum.html
    plt.plot(group['sem'], group.casos.cumsum(), c='green')
    plt.title(name)
    plt.xlabel("Semana")
    plt.ylabel("Acumulado de Casos normalizados")
    plt.show()
    
    plt.figure()
    # https://machinelearningmastery.com/gentle-introduction-autocorrelation-partial-autocorrelation/
    plot_acf(detrended, lags=52)
    # https://www.statsmodels.org/dev/generated/statsmodels.tsa.stattools.acf.html
    plt.title(name)
    plt.xlabel("Retraso en semanas")
    plt.ylabel('Correlación')
    plt.show()
y = f(x) = -2.3224356070873255e-07 x + 0.00018642953885186607
error 2.458069138569354e-08
p =  4.2189376399252386e-21
pendiente significativa
R^2 0.008834750473414046
<Figure size 432x288 with 0 Axes>
y = f(x) = -1.9503026794867653e-08 x + 2.1003475197080275e-05
error 2.9914826904487065e-09
p =  7.747452841909533e-11
pendiente significativa
R^2 0.008415817644051377
<Figure size 432x288 with 0 Axes>
y = f(x) = -7.687412126573934e-08 x + 6.301895314999713e-05
error 1.1457310486128478e-07
p =  0.5026730466383222
pendiente no significativa
R^2 0.0012420716051614582
<Figure size 432x288 with 0 Axes>
y = f(x) = 4.324626671299835e-10 x + 2.04941342972253e-07
error 7.239655510808932e-11
p =  4.1887790791163235e-09
pendiente significativa
R^2 0.06134442012808605
<Figure size 432x288 with 0 Axes>
y = f(x) = 6.114012516943938e-08 x + -9.37021948204884e-06
error 5.454249521195378e-09
p =  7.943541732517958e-27
pendiente significativa
R^2 0.1593291079485521
<Figure size 432x288 with 0 Axes>
y = f(x) = -5.4762876304630354e-06 x + 0.004047543918036931
error 6.313606290646645e-07
p =  1.2891385768087959e-17
pendiente significativa
R^2 0.057202405083796086
<Figure size 432x288 with 0 Axes>
y = f(x) = 3.0607452607643954e-14 x + 8.632235917582137e-10
error 1.081125720208934e-12
p =  0.977427221531179
pendiente no significativa
R^2 1.8298987789071008e-06
<Figure size 432x288 with 0 Axes>
y = f(x) = -4.552145867367468e-08 x + 4.150055938885172e-05
error 5.28039387715845e-09
p =  1.7027198403366345e-17
pendiente significativa
R^2 0.04850098103773122
<Figure size 432x288 with 0 Axes>
y = f(x) = -5.496981234999186e-10 x + 8.572895488021172e-07
error 2.657552949170129e-10
p =  0.038856309457124164
pendiente significativa
R^2 0.004272969521096534
<Figure size 432x288 with 0 Axes>
y = f(x) = -3.11383856620073e-08 x + 3.0350821580836582e-05
error 3.4613396780786333e-09
p =  2.313921331364859e-18
pendiente significativa
R^2 0.10649538120403663
<Figure size 432x288 with 0 Axes>
y = f(x) = -9.132331371853938e-10 x + 1.3202325787207294e-06
error 1.431149104840528e-10
p =  4.359158383428425e-10
pendiente significativa
R^2 0.08264080526994147
<Figure size 432x288 with 0 Axes>
y = f(x) = 3.638834463785146e-10 x + 1.1174278631513553e-06
error 1.1201987356227443e-10
p =  0.001257639893824037
pendiente significativa
R^2 0.025453964627936122
<Figure size 432x288 with 0 Axes>

Se extraen las características de cada CIE en tanto serie de tiempo. A saber, su pendiente, ordenada en el origen y las autocorrelaciones con retraso de 1 a 52 semanas (eliminando el retraso de 0 semanas)

In [491]:
ciesF = pd.DataFrame(ciesF)

# https://stackoverflow.com/a/11346337
colNames = ['m', 'b']
for i in range (53):
    colNames.append('ac' + str(i))
colNames.append('cie')
ciesF.columns = colNames

ciesF = ciesF.drop(['ac0'], axis=1)

ciesF.sort_values(by=['m'], ascending=False)
Out[491]:
m b ac1 ac2 ac3 ac4 ac5 ac6 ac7 ac8 ... ac44 ac45 ac46 ac47 ac48 ac49 ac50 ac51 ac52 cie
38 3.638834e-10 1.117428e-06 0.497210 0.371337 0.338770 0.295026 0.246889 0.170797 0.177532 0.174927 ... 0.095734 0.079953 0.094433 0.045073 0.020609 0.002565 -0.025604 0.001100 0.014393 z21
11 2.725401e-10 1.947195e-08 0.089659 -0.019229 -0.025867 -0.002608 -0.028985 -0.001130 -0.058919 -0.019433 ... -0.034949 -0.062431 -0.002219 0.029734 0.005434 -0.050439 0.078625 0.029168 -0.002898 a37
20 4.306189e-11 4.531178e-07 0.299805 0.225857 0.068495 0.057942 0.001655 0.017533 0.033656 0.100137 ... 0.050684 0.016318 0.061034 0.053793 0.021663 0.059753 -0.005982 0.033164 -0.047455 b17.1
0 4.268750e-11 -5.612765e-09 0.393749 0.122484 0.010328 0.026661 0.015698 0.021141 -0.002905 -0.010120 ... -0.017536 -0.017607 -0.017679 -0.017751 -0.017822 -0.017893 -0.017965 -0.018036 -0.018107 a00
8 3.893878e-11 4.432440e-08 0.393243 0.260146 0.044702 0.032172 0.012605 0.025166 0.014480 0.009281 ... -0.017366 0.067969 0.005142 0.003701 -0.020010 0.007456 0.042142 0.077540 0.090818 a27
9 2.343471e-13 -3.600315e-11 -0.007522 -0.007561 -0.007600 -0.007639 -0.007678 -0.007717 -0.007756 -0.007795 ... -0.000592 -0.000605 -0.000618 -0.000631 -0.000644 -0.000657 -0.000670 -0.000683 -0.000696 a33
30 3.060745e-14 8.632236e-10 0.055952 0.062823 0.076371 0.006623 0.030863 -0.037294 -0.060305 0.059805 ... -0.057127 -0.057262 -0.035717 -0.035852 -0.057666 -0.057801 -0.057936 -0.058070 -0.058205 p35.0
24 -1.067385e-11 4.355251e-09 0.077117 0.322187 0.046591 0.036037 -0.023600 0.020128 -0.018800 -0.026529 ... -0.023442 0.017590 -0.018073 -0.018183 -0.023882 0.032515 -0.001876 0.011733 0.004771 b50
6 -1.462773e-11 4.515318e-08 0.085993 0.048897 0.064574 0.118768 -0.023565 -0.026439 0.023959 -0.036252 ... 0.064103 0.019108 -0.021642 -0.019077 0.015544 0.031099 -0.056185 -0.073401 -0.004459 a17.0
35 -2.343882e-11 1.596244e-07 0.550097 0.512561 0.454005 0.411662 0.363330 0.254477 0.225435 0.181761 ... -0.032387 -0.007867 0.004137 -0.020714 0.015187 -0.003246 0.008389 0.020997 0.018492 u98
10 -3.423410e-11 1.862211e-08 -0.081445 0.057375 -0.049496 0.012841 0.056102 -0.020204 -0.009801 -0.032175 ... -0.005745 0.005151 -0.043027 0.056617 -0.031916 0.114628 -0.035333 0.052850 -0.016788 a34
14 -4.514430e-11 5.003110e-08 0.076912 0.149122 0.088945 0.072881 0.114257 0.074084 0.108017 0.108573 ... 0.036326 0.015114 0.051616 0.032731 -0.039804 0.002215 0.025619 0.016111 -0.024214 a50
13 -1.074456e-10 5.326971e-08 0.003941 -0.012528 -0.008077 -0.013007 -0.007537 -0.016467 0.003030 -0.001938 ... -0.011364 -0.003043 -0.003882 0.012084 0.000458 -0.001701 0.007212 -0.004964 -0.004440 a39.0
19 -1.241134e-10 2.116017e-07 0.289203 0.226359 0.276856 0.274657 0.253692 0.247411 0.247509 0.206441 ... 0.021663 0.093129 0.023837 0.146601 0.018644 0.036760 0.043459 -0.015475 -0.004356 b16
7 -1.764268e-10 6.894953e-07 0.522652 0.363609 0.361372 0.385651 0.326709 0.325553 0.318398 0.252245 ... 0.196099 0.182101 0.163183 0.167669 0.235813 0.206676 0.215066 0.167751 0.183912 a23
26 -2.654424e-10 3.204952e-07 0.110847 0.069672 0.107836 0.080559 0.128797 -0.027006 0.042914 -0.009807 ... 0.029668 0.076565 0.089312 0.024519 0.008848 0.059614 0.099755 0.046717 -0.020862 g00-g03
17 -2.722126e-10 1.262294e-07 0.488103 0.474707 0.426456 0.414057 0.317635 0.304248 0.261418 0.248762 ... 0.072010 0.087184 0.117220 0.163608 0.087383 0.129308 0.128205 0.131949 0.139945 b06
27 -4.556172e-10 3.750011e-07 0.232941 0.165279 0.155966 0.155952 0.159776 0.110074 0.261158 0.168664 ... 0.016427 0.004149 -0.044051 0.007307 -0.018886 -0.029089 0.021237 0.039481 0.027578 i00-i02
31 -7.142618e-10 1.178303e-06 0.791010 0.750958 0.691090 0.610929 0.512450 0.434146 0.366592 0.258712 ... 0.341533 0.408299 0.435233 0.469282 0.511489 0.522807 0.498786 0.480615 0.470837 t60
34 -8.130422e-10 1.482782e-06 0.026306 0.067629 0.068971 0.106583 0.028512 0.007521 0.073457 -0.032228 ... 0.055582 0.128333 0.022517 0.066484 0.006181 0.028253 0.168174 0.105951 0.083272 u97
37 -9.132331e-10 1.320233e-06 0.826727 0.781848 0.727522 0.675235 0.624828 0.550919 0.484088 0.401629 ... 0.459125 0.506974 0.547571 0.601010 0.631255 0.670476 0.649844 0.640989 0.621946 x20
21 -9.851453e-10 1.437287e-06 0.149215 0.272471 0.152501 0.089044 0.100963 0.106820 0.087191 -0.016662 ... 0.075923 0.128809 0.068058 0.029803 0.013137 -0.020469 -0.029275 -0.055377 -0.035842 b20-b24
25 -1.183463e-09 7.100912e-07 0.388003 0.416624 0.313638 0.387392 0.284052 0.250379 0.164883 0.231499 ... 0.188028 0.150510 0.140413 0.147020 0.207482 0.171138 0.209809 0.209396 0.200214 b51
15 -2.875762e-09 8.658057e-06 0.895262 0.842421 0.792705 0.707147 0.622664 0.527720 0.432752 0.356828 ... 0.243528 0.298282 0.370211 0.423460 0.466082 0.511227 0.539638 0.552434 0.540235 a90
2 -3.075234e-09 1.290497e-05 0.667701 0.505751 0.436771 0.352219 0.289719 0.228429 0.172198 0.107678 ... 0.223528 0.189754 0.185963 0.183744 0.179380 0.151920 0.143924 0.134758 0.057089 a01.0
5 -3.831012e-09 4.559111e-06 0.380632 0.195406 0.107774 0.091505 0.073293 0.073178 0.027202 0.086867 ... 0.121356 0.134870 0.096912 0.037600 0.126218 0.126044 0.177925 0.161334 0.038959 a15-a16
12 -4.255464e-09 2.747090e-06 0.140500 0.121196 0.127669 0.099854 0.073198 0.027292 -0.002968 -0.016134 ... 0.035622 0.029450 0.045802 0.047865 0.060802 0.095082 0.062623 0.058167 0.053912 a38
22 -4.597787e-09 2.970356e-06 0.564047 0.398251 0.335638 0.284887 0.179178 0.078869 0.009511 -0.045524 ... 0.070795 0.069166 0.100421 0.164831 0.185766 0.150027 0.154843 0.220636 0.151552 b26
18 -7.433435e-09 6.901512e-06 0.765747 0.587239 0.516973 0.418962 0.348350 0.239331 0.135074 0.112791 ... 0.326428 0.312633 0.307638 0.296214 0.237261 0.190637 0.124537 0.059086 0.007564 b15
4 -9.213574e-09 1.295795e-05 0.605797 0.479571 0.410353 0.367218 0.341321 0.311389 0.234742 0.219977 ... 0.255961 0.229158 0.221748 0.178246 0.191535 0.225850 0.294085 0.313643 0.295370 a05
3 -1.078567e-08 6.155839e-06 0.620012 0.595386 0.535631 0.462663 0.438088 0.373148 0.289653 0.300332 ... 0.178095 0.234208 0.255625 0.278374 0.284554 0.274991 0.301086 0.266200 0.231869 a03
32 -1.315899e-08 1.643445e-05 0.903920 0.826842 0.732156 0.614444 0.497397 0.376862 0.268490 0.160477 ... 0.183030 0.131180 0.085666 0.037463 -0.008971 -0.066770 -0.114970 -0.152466 -0.191697 t63 excepto t63.2
23 -3.141197e-08 1.115938e-04 0.767118 0.547797 0.395471 0.291686 0.210417 0.128438 0.081658 0.051735 ... -0.017768 -0.024711 -0.028894 -0.060319 -0.058954 -0.048837 -0.020363 -0.004847 -0.005577 b30
29 -4.387659e-08 5.089286e-05 0.927733 0.858740 0.790416 0.727838 0.637424 0.545003 0.449916 0.365855 ... 0.569450 0.607758 0.624284 0.633800 0.647336 0.649295 0.625177 0.589587 0.547403 j12
33 -5.001284e-08 8.796233e-05 0.881185 0.793629 0.694008 0.596382 0.456681 0.319222 0.189304 0.062305 ... 0.314212 0.388623 0.451051 0.497185 0.527848 0.531338 0.510950 0.475725 0.426713 t63.2
36 -5.193298e-08 4.533358e-05 0.844756 0.768593 0.723830 0.666654 0.614451 0.564508 0.510689 0.441227 ... 0.355990 0.405624 0.456501 0.503929 0.521726 0.555444 0.542070 0.525043 0.500933 w54
16 -1.464468e-07 1.121105e-04 0.946239 0.892301 0.837622 0.778140 0.711102 0.629254 0.536098 0.431731 ... 0.530872 0.565769 0.593141 0.613168 0.623759 0.618582 0.598455 0.570398 0.530427 b01
1 -1.765308e-06 1.975491e-03 0.831565 0.690654 0.594390 0.516446 0.437316 0.326857 0.221760 0.104962 ... 0.289442 0.214354 0.137897 0.059001 -0.016753 -0.061065 -0.108941 -0.158795 -0.225823 a01-a03
28 -6.932315e-06 8.805968e-03 0.853479 0.727355 0.620092 0.535945 0.445607 0.349639 0.267460 0.175938 ... 0.200076 0.214129 0.232129 0.213833 0.204222 0.194258 0.186803 0.151652 0.122209 j00-j06

39 rows × 55 columns

Se agrupan las CIEs por casos dados en una semana, como series de tiempo.

In [492]:
ciesTS = pd.DataFrame(ciesTSt)
t = list(cie.groups.keys())
t.remove('a92.3')
ciesTS['cie'] = t

ciesTS.sample(3)
Out[492]:
0 1 2 3 4 5 6 7 8 9 ... 499 500 501 502 503 504 505 506 507 cie
12 3.413076e-06 2.295700e-06 2.031593e-06 2.620755e-06 3.331812e-06 3.372444e-06 3.331812e-06 4.347608e-06 3.392760e-06 3.453708e-06 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN a38
20 4.875822e-07 5.078982e-07 4.063185e-07 4.266345e-07 4.266345e-07 6.297937e-07 4.875822e-07 6.501096e-07 3.656867e-07 3.453708e-07 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN b17.1
25 7.110574e-07 7.110574e-07 1.015796e-07 9.548485e-07 1.422115e-07 3.047389e-07 2.031593e-07 2.641070e-07 5.891619e-07 1.828433e-07 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN b51

3 rows × 509 columns

Pronóstico

Una vez eliminada la tendencia se puede comprobar que las series de tiempo para cada enfermedad son estacionales

In [493]:
from statsmodels.tsa.stattools import adfuller
def test_stationarity(timeseries, w, name):
    
    #Determing rolling statistics
    rolmean = timeseries.rolling(w).mean()
    rolstd = timeseries.rolling(w).std()

    #Plot rolling statistics:
    orig = plt.plot(timeseries,label='Original')
    mean = plt.plot(rolmean, color='red', label='Rolling Mean')
    std = plt.plot(rolstd, color='black', label = 'Rolling Std')
    plt.legend(loc='best')
    plt.title(name)
    plt.show(block=False)
    
    #Perform Dickey-Fuller test:
    print('Results of Dickey-Fuller Test:')
    dftest = adfuller(timeseries, autolag='AIC')
    dfoutput = pd.Series(dftest[0:4], index=['Test Statistic','p-value','#Lags Used','Number of Observations Used'])
    for key,value in dftest[4].items():
        dfoutput['Critical Value (%s)'%key] = value
    print(dfoutput)
    
for i in range(len(ciesTS)):
    temp = ciesTS.iloc[i, : 508]
    temp.dropna(inplace=True)
    test_stationarity(temp, 52, ciesTS.iloc[i, -1])
Results of Dickey-Fuller Test:
Test Statistic                -1.356054e+01
p-value                        2.314720e-25
#Lags Used                     0.000000e+00
Number of Observations Used    4.300000e+02
Critical Value (1%)           -3.445649e+00
Critical Value (5%)           -2.868285e+00
Critical Value (10%)          -2.570363e+00
dtype: float64
Results of Dickey-Fuller Test:
Test Statistic                  -3.424746
p-value                          0.010144
#Lags Used                       2.000000
Number of Observations Used    348.000000
Critical Value (1%)             -3.449282
Critical Value (5%)             -2.869881
Critical Value (10%)            -2.571214
dtype: float64
Results of Dickey-Fuller Test:
Test Statistic                -5.719534e+00
p-value                        6.993272e-07
#Lags Used                     2.000000e+00
Number of Observations Used    4.150000e+02
Critical Value (1%)           -3.446206e+00
Critical Value (5%)           -2.868530e+00
Critical Value (10%)          -2.570493e+00
dtype: float64
Results of Dickey-Fuller Test:
Test Statistic                  -2.106851
p-value                          0.241769
#Lags Used                       7.000000
Number of Observations Used    461.000000
Critical Value (1%)             -3.444615
Critical Value (5%)             -2.867830
Critical Value (10%)            -2.570120
dtype: float64
Results of Dickey-Fuller Test:
Test Statistic                  -3.760647
p-value                          0.003337
#Lags Used                       4.000000
Number of Observations Used    476.000000
Critical Value (1%)             -3.444163
Critical Value (5%)             -2.867631
Critical Value (10%)            -2.570014
dtype: float64
Results of Dickey-Fuller Test:
Test Statistic                  -2.133072
p-value                          0.231426
#Lags Used                      10.000000
Number of Observations Used    461.000000
Critical Value (1%)             -3.444615
Critical Value (5%)             -2.867830
Critical Value (10%)            -2.570120
dtype: float64
Results of Dickey-Fuller Test:
Test Statistic                -8.720275e+00
p-value                        3.400303e-14
#Lags Used                     3.000000e+00
Number of Observations Used    4.700000e+02
Critical Value (1%)           -3.444340e+00
Critical Value (5%)           -2.867709e+00
Critical Value (10%)          -2.570056e+00
dtype: float64
Results of Dickey-Fuller Test:
Test Statistic                  -4.078834
p-value                          0.001050
#Lags Used                       5.000000
Number of Observations Used    435.000000
Critical Value (1%)             -3.445473
Critical Value (5%)             -2.868207
Critical Value (10%)            -2.570321
dtype: float64
Results of Dickey-Fuller Test:
Test Statistic                -1.063365e+01
p-value                        5.137862e-19
#Lags Used                     2.000000e+00
Number of Observations Used    4.810000e+02
Critical Value (1%)           -3.444018e+00
Critical Value (5%)           -2.867568e+00
Critical Value (10%)          -2.569980e+00
dtype: float64
Results of Dickey-Fuller Test:
Test Statistic                 -21.517435
p-value                          0.000000
#Lags Used                       0.000000
Number of Observations Used    463.000000
Critical Value (1%)             -3.444553
Critical Value (5%)             -2.867803
Critical Value (10%)            -2.570106
dtype: float64
Results of Dickey-Fuller Test:
Test Statistic                -1.468685e+01
p-value                        3.093388e-27
#Lags Used                     1.000000e+00
Number of Observations Used    5.060000e+02
Critical Value (1%)           -3.443340e+00
Critical Value (5%)           -2.867269e+00
Critical Value (10%)          -2.569821e+00
dtype: float64
Results of Dickey-Fuller Test:
Test Statistic                  -3.809081
p-value                          0.002816
#Lags Used                      11.000000
Number of Observations Used    445.000000
Critical Value (1%)             -3.445131
Critical Value (5%)             -2.868057
Critical Value (10%)            -2.570241
dtype: float64
Results of Dickey-Fuller Test:
Test Statistic                  -5.053787
p-value                          0.000017
#Lags Used                       4.000000
Number of Observations Used    468.000000
Critical Value (1%)             -3.444400
Critical Value (5%)             -2.867736
Critical Value (10%)            -2.570070
dtype: float64
Results of Dickey-Fuller Test:
Test Statistic                 -20.908062
p-value                          0.000000
#Lags Used                       0.000000
Number of Observations Used    449.000000
Critical Value (1%)             -3.444998
Critical Value (5%)             -2.867999
Critical Value (10%)            -2.570210
dtype: float64
Results of Dickey-Fuller Test:
Test Statistic                  -2.973026
p-value                          0.037491
#Lags Used                      11.000000
Number of Observations Used    447.000000
Critical Value (1%)             -3.445064
Critical Value (5%)             -2.868028
Critical Value (10%)            -2.570226
dtype: float64
Results of Dickey-Fuller Test:
Test Statistic                  -5.304842
p-value                          0.000005
#Lags Used                      12.000000
Number of Observations Used    488.000000
Critical Value (1%)             -3.443821
Critical Value (5%)             -2.867481
Critical Value (10%)            -2.569934
dtype: float64
Results of Dickey-Fuller Test:
Test Statistic                -6.501620e+00
p-value                        1.158034e-08
#Lags Used                     9.000000e+00
Number of Observations Used    4.630000e+02
Critical Value (1%)           -3.444553e+00
Critical Value (5%)           -2.867803e+00
Critical Value (10%)          -2.570106e+00
dtype: float64
Results of Dickey-Fuller Test:
Test Statistic                  -1.996208
p-value                          0.288238
#Lags Used                      11.000000
Number of Observations Used    433.000000
Critical Value (1%)             -3.445543
Critical Value (5%)             -2.868238
Critical Value (10%)            -2.570338
dtype: float64
Results of Dickey-Fuller Test:
Test Statistic                  -3.403355
p-value                          0.010845
#Lags Used                       8.000000
Number of Observations Used    376.000000
Critical Value (1%)             -3.447862
Critical Value (5%)             -2.869258
Critical Value (10%)            -2.570881
dtype: float64
Results of Dickey-Fuller Test:
Test Statistic                  -3.818212
p-value                          0.002726
#Lags Used                       6.000000
Number of Observations Used    433.000000
Critical Value (1%)             -3.445543
Critical Value (5%)             -2.868238
Critical Value (10%)            -2.570338
dtype: float64
Results of Dickey-Fuller Test:
Test Statistic                -9.927614e+00
p-value                        2.878961e-17
#Lags Used                     1.000000e+00
Number of Observations Used    3.980000e+02
Critical Value (1%)           -3.446888e+00
Critical Value (5%)           -2.868829e+00
Critical Value (10%)          -2.570653e+00
dtype: float64
Results of Dickey-Fuller Test:
Test Statistic                -7.628780e+00
p-value                        2.034981e-11
#Lags Used                     2.000000e+00
Number of Observations Used    4.000000e+02
Critical Value (1%)           -3.446804e+00
Critical Value (5%)           -2.868793e+00
Critical Value (10%)          -2.570634e+00
dtype: float64
Results of Dickey-Fuller Test:
Test Statistic                  -1.383949
p-value                          0.589947
#Lags Used                      14.000000
Number of Observations Used    408.000000
Critical Value (1%)             -3.446480
Critical Value (5%)             -2.868650
Critical Value (10%)            -2.570557
dtype: float64
Results of Dickey-Fuller Test:
Test Statistic                -6.469065e+00
p-value                        1.381678e-08
#Lags Used                     1.000000e+00
Number of Observations Used    3.080000e+02
Critical Value (1%)           -3.451761e+00
Critical Value (5%)           -2.870970e+00
Critical Value (10%)          -2.571794e+00
dtype: float64
Results of Dickey-Fuller Test:
Test Statistic                -1.049502e+01
p-value                        1.121338e-18
#Lags Used                     1.000000e+00
Number of Observations Used    4.900000e+02
Critical Value (1%)           -3.443766e+00
Critical Value (5%)           -2.867457e+00
Critical Value (10%)          -2.569921e+00
dtype: float64
Results of Dickey-Fuller Test:
Test Statistic                  -3.050080
p-value                          0.030475
#Lags Used                       8.000000
Number of Observations Used    471.000000
Critical Value (1%)             -3.444310
Critical Value (5%)             -2.867696
Critical Value (10%)            -2.570049
dtype: float64
Results of Dickey-Fuller Test:
Test Statistic                  -4.194474
p-value                          0.000673
#Lags Used                       8.000000
Number of Observations Used    435.000000
Critical Value (1%)             -3.445473
Critical Value (5%)             -2.868207
Critical Value (10%)            -2.570321
dtype: float64
Results of Dickey-Fuller Test:
Test Statistic                  -5.300617
p-value                          0.000005
#Lags Used                      17.000000
Number of Observations Used    448.000000
Critical Value (1%)             -3.445031
Critical Value (5%)             -2.868013
Critical Value (10%)            -2.570218
dtype: float64
Results of Dickey-Fuller Test:
Test Statistic                  -5.196448
p-value                          0.000009
#Lags Used                       0.000000
Number of Observations Used    422.000000
Critical Value (1%)             -3.445941
Critical Value (5%)             -2.868413
Critical Value (10%)            -2.570431
dtype: float64
Results of Dickey-Fuller Test:
Test Statistic                -6.052569e+00
p-value                        1.267267e-07
#Lags Used                     1.300000e+01
Number of Observations Used    4.740000e+02
Critical Value (1%)           -3.444221e+00
Critical Value (5%)           -2.867657e+00
Critical Value (10%)          -2.570028e+00
dtype: float64
Results of Dickey-Fuller Test:
Test Statistic                -1.053776e+01
p-value                        8.810400e-19
#Lags Used                     2.000000e+00
Number of Observations Used    4.370000e+02
Critical Value (1%)           -3.445403e+00
Critical Value (5%)           -2.868177e+00
Critical Value (10%)          -2.570305e+00
dtype: float64
Results of Dickey-Fuller Test:
Test Statistic                  -5.316712
p-value                          0.000005
#Lags Used                      14.000000
Number of Observations Used    451.000000
Critical Value (1%)             -3.444933
Critical Value (5%)             -2.867970
Critical Value (10%)            -2.570195
dtype: float64
Results of Dickey-Fuller Test:
Test Statistic                  -4.113528
p-value                          0.000920
#Lags Used                       4.000000
Number of Observations Used    269.000000
Critical Value (1%)             -3.454896
Critical Value (5%)             -2.872345
Critical Value (10%)            -2.572528
dtype: float64
Results of Dickey-Fuller Test:
Test Statistic                  -4.874372
p-value                          0.000039
#Lags Used                      11.000000
Number of Observations Used    452.000000
Critical Value (1%)             -3.444900
Critical Value (5%)             -2.867956
Critical Value (10%)            -2.570187
dtype: float64
Results of Dickey-Fuller Test:
Test Statistic                  -5.375052
p-value                          0.000004
#Lags Used                       8.000000
Number of Observations Used    474.000000
Critical Value (1%)             -3.444221
Critical Value (5%)             -2.867657
Critical Value (10%)            -2.570028
dtype: float64
Results of Dickey-Fuller Test:
Test Statistic                  -5.342202
p-value                          0.000004
#Lags Used                       5.000000
Number of Observations Used    458.000000
Critical Value (1%)             -3.444709
Critical Value (5%)             -2.867871
Critical Value (10%)            -2.570142
dtype: float64
Results of Dickey-Fuller Test:
Test Statistic                  -1.813468
p-value                          0.373796
#Lags Used                       2.000000
Number of Observations Used    452.000000
Critical Value (1%)             -3.444900
Critical Value (5%)             -2.867956
Critical Value (10%)            -2.570187
dtype: float64
Results of Dickey-Fuller Test:
Test Statistic                -6.673468e+00
p-value                        4.526341e-09
#Lags Used                     1.100000e+01
Number of Observations Used    4.420000e+02
Critical Value (1%)           -3.445232e+00
Critical Value (5%)           -2.868101e+00
Critical Value (10%)          -2.570265e+00
dtype: float64
Results of Dickey-Fuller Test:
Test Statistic                -6.644475e+00
p-value                        5.308127e-09
#Lags Used                     2.000000e+00
Number of Observations Used    4.030000e+02
Critical Value (1%)           -3.446681e+00
Critical Value (5%)           -2.868739e+00
Critical Value (10%)          -2.570605e+00
dtype: float64

Por ello es posible pronosticarlas con el método de Holt-Winter

In [494]:
# https://www.analyticsvidhya.com/blog/2018/02/time-series-forecasting-methods/
# https://machinelearningmastery.com/time-series-forecasting-methods-in-python-cheat-sheet/
# https://towardsdatascience.com/time-series-in-python-exponential-smoothing-and-arima-processes-2c67f2a52788

from statsmodels.tsa.api import Holt, SimpleExpSmoothing, ExponentialSmoothing

pronosticos = []
real = []
for i in range(len(ciesTS)):
    temp = ciesTS.iloc[i, : 508]
    temp.dropna(inplace=True)
    
    # https://medium.com/datadriveninvestor/how-to-build-exponential-smoothing-models-using-python-simple-exponential-smoothing-holt-and-da371189e1a1
    # Train = 0.7
    train = round(0.7 * len(temp))
    f = ExponentialSmoothing(np.asarray(temp.iloc[0:train])).fit(smoothing_level = 0.1)
    # https://stackoverflow.com/a/50786171
    pred = f.predict(start=train + 1, end=len(temp))
    fcast = f.forecast(len(temp) - train)
    plt.figure(figsize=(12, 4))
    plt.plot(temp)
    plt.plot(f.fittedvalues, c='black')
    plt.plot(range(train, len(temp)), fcast, c='red')
    #plt.plot(range(train, len(temp)), pred, c='red')
    plt.title(ciesTS.iloc[i, 508])
    plt.legend(["Serie de tiempo", "Ajuste de Holt-Winter", "Pronóstico de Holt-Winter"])
    plt.show()
    
    
    # https://stackoverflow.com/a/15863028
    real.append(temp.iloc[-1])
    pronosticos.append(fcast[0])

Que presenta muy buen ajuste respecto a los datos reales

In [417]:
a, b, r, p, e = stats.linregress(real, pronosticos)
print("y = f(x) = {:.4f} x + {:.4f}".format(a, b))
print("error", e)
print("p = ", p)
print("pendiente {:s}significativa".format("no " if p >= 0.05 else ""))
print("R^2", r**2)

plt.title('Logaritmo del pronóstico', fontsize = 20)
plt.xlabel('Logaritmo del último caso normalizado')
plt.ylabel('Pronóstico')
plt.scatter(real, pronosticos)
plt.show()
y = f(x) = 1.1122 x + 0.0000
error 0.019116083562680818
p =  5.559074078899032e-38
pendiente significativa
R^2 0.9891871972161727

Lo que se evidencia claramente al utilizar escala logarítmica en los datos

In [536]:
plt.title('Logaritmo del pronóstico', fontsize = 20)
plt.xlabel('Logaritmo del último caso normalizado')
plt.ylabel('Pronóstico')
plt.scatter(np.log(real), np.log(pronosticos))
plt.show()
C:\Users\bena8\AppData\Local\Programs\Python\Python37-32\lib\site-packages\ipykernel_launcher.py:4: RuntimeWarning: divide by zero encountered in log
  after removing the cwd from sys.path.
In [526]:
# https://machinelearningmastery.com/time-series-forecast-uncertainty-using-confidence-intervals-python/
# https://machinelearningmastery.com/make-sample-forecasts-arima-python/
from statsmodels.tsa.arima_model import ARIMA

def difference(dataset, interval=1):
    diff = list()
    for i in range(interval, len(dataset)):
        value = dataset[i] - dataset[i - interval]
        diff.append(value)
    return pd.Series(diff)

for i in range(len(ciesTS)):
    temp = ciesTS.iloc[i, : 508]
    temp.dropna(inplace=True)
    train = round(0.9 * len(temp))
    ts = difference(np.asarray(temp), interval=len(temp) - train - 1)
    m = ARIMA(ts, order=(3, 1, 0))  
    f = m.fit(trend = 'nc', disp = 0)
    pred = f.predict(start=train + 1, end=len(temp))
    tempDiff = temp - temp.shift()
    plt.figure(figsize=(16, 4))
    plt.plot(tempDiff)
    plt.plot(f.fittedvalues, color='red')
    plt.plot(range(train, len(temp)), pred, c='black')
    plt.legend(['Diferencia', 'Valores ajustados de ARIMA', 'Pronóstico de ARIMA'])
    plt.show()